This document describes how to go from raw vcftools output of diversity metrics (Fst, pi, and Tajima’s D) to Manhattan plots, including making figures for the manuscript associated with this repo. This script is a result of brute forcing things to work: I am certainly no expert, and thus lots of the notes refer to my own dumb mistakes.

library(tidyverse)
library(qqman)
library(scales)

Prepare files: join vcftools output into a single table.

First, read in raw output files from vcftools (in this repo, see filter-scan.sh for code to generate).

fst.UKUS.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_UKUS_50kb.windowed.weir.fst",sep="\t"))
fst.AUUK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_AUUK_50kb.windowed.weir.fst",sep="\t"))
fst.USAU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_AUUS_50kb.windowed.weir.fst",sep="\t"))
pi.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_UK_pi_50kb.windowed.pi",sep="\t"))
pi.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_AU_pi_50kb.windowed.pi",sep="\t"))
pi.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_US_pi_50kb.windowed.pi",sep="\t"))
TajD.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_UK_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_AU_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_US_TajimaD_50kb.Tajima.D",sep="\t"))

vcftools outputs a few identifies for position: “CHROM,” “BIN_START” and “BIN_END” for .fst and .pi files, but only “CHROM” AND “BIN_START” for .Tajima.D files. Unfortunately, the numbering is off, so we’ll add 1 to every “BIN_START” in the .Tajima.D files.

head(TajD.AU.50kb)
## # A tibble: 6 x 4
##   CHROM BIN_START N_SNPS TajimaD
##   <fct>     <int>  <int>   <dbl>
## 1 10            0    147  0.583 
## 2 10        50000    334  0.372 
## 3 10       100000    301  0.836 
## 4 10       150000     98  1.56  
## 5 10       200000    102 -0.557 
## 6 10       250000    139 -0.0356
head(fst.AUUK.50kb)
## # A tibble: 6 x 6
##   CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST  MEAN_FST
##   <fct>     <int>   <int>      <int>        <dbl>     <dbl>
## 1 10            1   50000        164      0.00792  0.0105  
## 2 10        50001  100000        379      0.0595   0.0500  
## 3 10       100001  150000        311      0.00728 -0.000459
## 4 10       150001  200000        120      0.0162   0.0275  
## 5 10       200001  250000        121      0.126    0.0926  
## 6 10       250001  300000        155      0.109    0.0747
TajD.UK.50kb$BIN_START <- TajD.UK.50kb$BIN_START + 1
TajD.AU.50kb$BIN_START <- TajD.AU.50kb$BIN_START + 1
TajD.US.50kb$BIN_START <- TajD.US.50kb$BIN_START + 1

Now BIN_START should match. To use dplyr, we’ll need a column that specifes a unique position in the genome. Create a new column that joins “CHROM” and “BIN_START” so that we match each value based on actual position in the genome.

fst.UKUS.50kb$POS_ID <- paste(fst.UKUS.50kb$CHROM,fst.UKUS.50kb$BIN_START,sep="-")
fst.AUUK.50kb$POS_ID <- paste(fst.AUUK.50kb$CHROM,fst.AUUK.50kb$BIN_START,sep="-")
fst.USAU.50kb$POS_ID <- paste(fst.USAU.50kb$CHROM,fst.USAU.50kb$BIN_START,sep="-")
pi.UK.50kb$POS_ID <- paste(pi.UK.50kb$CHROM,pi.UK.50kb$BIN_START,sep="-")
pi.AU.50kb$POS_ID <- paste(pi.AU.50kb$CHROM,pi.AU.50kb$BIN_START,sep="-")
pi.US.50kb$POS_ID <- paste(pi.US.50kb$CHROM,pi.US.50kb$BIN_START,sep="-")
TajD.UK.50kb$POS_ID <- paste(TajD.UK.50kb$CHROM,TajD.UK.50kb$BIN_START,sep="-")
TajD.AU.50kb$POS_ID <- paste(TajD.AU.50kb$CHROM,TajD.AU.50kb$BIN_START,sep="-")
TajD.US.50kb$POS_ID <- paste(TajD.US.50kb$CHROM,TajD.US.50kb$BIN_START,sep="-")

Now drop column names that we no longer need, otherwise we’ll have a giant table after all the merging.

fst.AUUK.50kb <- fst.AUUK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
fst.USAU.50kb <- fst.USAU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.UK.50kb <- pi.UK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.AU.50kb <- pi.AU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.US.50kb <- pi.US.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
TajD.UK.50kb <- TajD.UK.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.AU.50kb <- TajD.AU.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.US.50kb <- TajD.US.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)

We’ll join tables based on the unique position identified (POS_ID). The rename() command ensures that each new column header specifies population (new name first). Heads up: each time you join two tables, those two tables are no longer accessible except as a joined table!

fstUKUS.fstAUUK.50kb <- left_join(fst.UKUS.50kb,fst.AUUK.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UKUS","_AUUK"))
fst.50kb <- left_join(fstUKUS.fstAUUK.50kb,fst.USAU.50kb, by = "POS_ID", copy = FALSE, suffix=c("","_USAU"))
fst.50kb <- rename(fst.50kb, WEIGHTED_FST_USAU = WEIGHTED_FST)
fst.50kb <- rename(fst.50kb, MEAN_FST_USAU = MEAN_FST)
fst.50kb.piUK <- left_join(fst.50kb,pi.UK.50kb, by = "POS_ID", copy = FALSE)
fst.50kb.piUK.piUS <- left_join(fst.50kb.piUK,pi.US.50kb, by = "POS_ID", copy = FALSE,suffix=c("_UK","_US"))
fst.pi.50kb <- left_join(fst.50kb.piUK.piUS,pi.AU.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb <- rename(fst.pi.50kb, PI_AU = PI)
fst.pi.50kb.TajDUK <- left_join(fst.pi.50kb,TajD.UK.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb.TajDUK.TajDUS <- left_join(fst.pi.50kb.TajDUK,TajD.US.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UK","_US"))
div <- left_join(fst.pi.50kb.TajDUK.TajDUS,TajD.AU.50kb, by = "POS_ID", copy = FALSE)
div <- rename(div, TajimaD_AU = TajimaD)

We’re going to drop data from: * small scaffolds (which conveniently start with ‘KQ’ or ‘LNCF’, * any rows (positions) with missing data (NA), * and also coerce “negative” FST or pi to zero. We replace POS_ID (which was converted to numeric) with SNP. For the qqman package, chromosomes need to be a numeric value, so we also use lapply() below to rename chromosomes.

div <- filter(div, !grepl('KQ',CHROM))
div <- filter(div, !grepl('LNCF',CHROM))
div <- filter(div, !grepl('Unknown',CHROM))
div <- div %>% drop_na()
div[,c(5:6)][div[,c(5:6)] < 0] <- 0
div[,c(8:14)][div[,c(8:14)] < 0] <- 0
div <- data.frame(lapply(div, function(x) {gsub("1A", "1.25", x)}))
div <- data.frame(lapply(div, function(x) {gsub("1B", "1.75", x)}))
div <- data.frame(lapply(div, function(x) {gsub("4A", "4.5", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LG5", "28", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LGE22", "29", x)}))
div <- data.frame(lapply(div, function(x) {gsub("Z", "0", x)}))
indx <- sapply(div, is.factor)
div[indx] <- lapply(div[indx], function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
div <- div %>% select(-POS_ID)
div$SNP <- seq.int(nrow(div))
str(div)
## 'data.frame':    20071 obs. of  17 variables:
##  $ CHROM            : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ BIN_START        : num  1 50001 100001 150001 200001 ...
##  $ BIN_END          : num  50000 100000 150000 200000 250000 300000 350000 400000 450000 500000 ...
##  $ N_VARIANTS       : num  165 377 311 120 121 156 80 101 24 18 ...
##  $ WEIGHTED_FST_UKUS: num  0.01745 0.03751 0 0 0.00586 ...
##  $ MEAN_FST_UKUS    : num  0.0133 0.0272 0 0 0 ...
##  $ WEIGHTED_FST_AUUK: num  0.00792 0.05948 0.00728 0.01624 0.12564 ...
##  $ MEAN_FST_AUUK    : num  0.0105 0.05 0 0.0275 0.0926 ...
##  $ WEIGHTED_FST_USAU: num  0.01291 0.00258 0 0 0.00315 ...
##  $ MEAN_FST_USAU    : num  0.0093 0.00585 0 0 0 ...
##  $ PI_UK            : num  0.001053 0.002189 0.002212 0.000915 0.000879 ...
##  $ PI_US            : num  0.001063 0.002373 0.002003 0.000829 0.0007 ...
##  $ PI_AU            : num  0.001024 0.002211 0.0022 0.000804 0.000532 ...
##  $ TajimaD_UK       : num  0.808 0.293 0.877 1.426 0.968 ...
##  $ TajimaD_US       : num  0.42348 0.58012 0.80239 1.05163 -0.00424 ...
##  $ TajimaD_AU       : num  0.583 0.372 0.836 1.557 -0.557 ...
##  $ SNP              : int  1 2 3 4 5 6 7 8 9 10 ...

Adding in a calculation here so that we don’t have to repeat it when filtering div table.

div$piUK.piAU <- div$PI_UK - div$PI_AU
div$piUK.piUS <- div$PI_UK - div$PI_US

Now we’re ready to plot and calculate genome-wide values!

Exploring genetic variation

First, we look at the distribution of variation across the genome.

Manhattan plots

manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Fst.UKUS.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen 
##                 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Fst.AUUK.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen 
##                 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.USAU.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen 
##                 2

Identifying outliers

quantile(div$WEIGHTED_FST_AUUK, c(.9,.95,.99,.999)) 
##       90%       95%       99%     99.9% 
## 0.0676462 0.0862444 0.1490122 0.3077638
quantile(div$WEIGHTED_FST_UKUS, c(.9,.95,.99,.999)) 
##       90%       95%       99%     99.9% 
## 0.0441469 0.0609766 0.1156152 0.2228447
mean(div$WEIGHTED_FST_AUUK) + 5*sd(div$WEIGHTED_FST_AUUK)
## [1] 0.1936444
mean(div$WEIGHTED_FST_UKUS) + 5*sd(div$WEIGHTED_FST_UKUS)
## [1] 0.1406712
div.outliers.AUUK <- div[which(div$WEIGHTED_FST_AUUK > quantile(div$WEIGHTED_FST_AUUK,.99)),]
div.outliers.USUK <- div[which(div$WEIGHTED_FST_UKUS > quantile(div$WEIGHTED_FST_UKUS,.99)),]

div.hifst.AUUK <- div[which(div$WEIGHTED_FST_AUUK > 0.1),]
div.hifst.UKUS <- div[which(div$WEIGHTED_FST_UKUS > 0.1),]

unique(div.outliers.USUK$CHROM)
##  [1] 11.00 12.00 13.00 17.00 19.00  1.25  1.00 28.00  2.00  3.00  4.50  4.00
## [13]  6.00 29.00  0.00
length(div.outliers.USUK$SNP)
## [1] 201
write.csv(div.outliers.USUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.USUK.csv")

unique(div.outliers.AUUK$CHROM)
##  [1] 10.00 12.00 13.00 17.00 18.00  1.25  1.00 23.00 27.00  2.00  3.00  4.50
## [13]  4.00  5.00  6.00  7.00  8.00  0.00
length(div.outliers.AUUK$SNP)
## [1] 201
write.csv(div.outliers.AUUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.AUUK.csv")

What’s going on w/ other metrics at these outliers?

summary(div.outliers.AUUK$PI_UK)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000025 0.0000730 0.0001890 0.0009734 0.0014188 0.0057224
summary(div.outliers.AUUK$PI_AU)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.383e-05 7.067e-05 2.285e-04 9.324e-04 1.447e-03 5.313e-03
summary(div.outliers.AUUK$TajimaD_AU)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.3050  0.2574  0.7501  0.6479  1.2528  2.6922
summary(div.outliers.AUUK$TajimaD_UK)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.2937  0.1481  0.6493  0.5926  1.0098  2.3588
summary(div.outliers.USUK$PI_US)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 7.167e-06 6.250e-05 2.890e-04 8.204e-04 1.281e-03 5.202e-03
summary(div.outliers.USUK$PI_UK)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000210 0.0000765 0.0002912 0.0008650 0.0012335 0.0058409
summary(div.outliers.USUK$TajimaD_US)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.1488 -0.4076  0.4343  0.3082  0.9422  2.9728
summary(div.outliers.USUK$TajimaD_UK)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.9252  0.3047  0.8963  0.8645  1.4612  2.5895
library(fitdistrplus)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: survival
descdist(div.outliers.AUUK$PI_UK)

## summary statistics
## ------
## min:  2.5e-06   max:  0.00572238 
## median:  0.000189 
## mean:  0.0009734123 
## estimated sd:  0.001348425 
## estimated skewness:  1.597318 
## estimated kurtosis:  4.653
descdist(div.outliers.AUUK$PI_AU)

## summary statistics
## ------
## min:  1.38333e-05   max:  0.00531275 
## median:  0.0002285 
## mean:  0.0009324382 
## estimated sd:  0.001252321 
## estimated skewness:  1.532558 
## estimated kurtosis:  4.458218
descdist(div.outliers.USUK$PI_UK)

## summary statistics
## ------
## min:  2.1e-05   max:  0.00584092 
## median:  0.000291167 
## mean:  0.000865046 
## estimated sd:  0.001150262 
## estimated skewness:  1.693987 
## estimated kurtosis:  5.349374
descdist(div.outliers.USUK$PI_US)

## summary statistics
## ------
## min:  7.16667e-06   max:  0.00520175 
## median:  0.000289007 
## mean:  0.0008203641 
## estimated sd:  0.001078227 
## estimated skewness:  1.65203 
## estimated kurtosis:  5.131592
#beta.outliers.AUUK <- fitdist(div.outliers.AUUK$PI_AU, "beta")
#summary(beta.outliers.AUUK)

#beta.outliers.USUK <- fitdist(div.outliers.USUK$PI_US, "beta")
#summary(beta.outliers.USUK)
div.outliers.AUUK.lowFstUSAU <- div.outliers.AUUK[which(div.outliers.AUUK$WEIGHTED_FST_USAU < 0.01 ),] 
unique(div.outliers.AUUK.lowFstUSAU$CHROM)
## [1] 10.00 12.00 13.00 17.00  1.25  4.50  6.00  0.00
length(div.outliers.AUUK.lowFstUSAU$SNP)
## [1] 21
div.outliers.USUK.lowFstUSAU <- div.outliers.USUK[which(div.outliers.USUK$WEIGHTED_FST_USAU < 0.01 ),] 
unique(div.outliers.USUK.lowFstUSAU$CHROM)
## [1] 12.00  1.25  1.00  4.50  4.00  6.00  0.00
length(div.outliers.USUK.lowFstUSAU$SNP)
## [1] 39
intersect(unique(div.outliers.AUUK.lowFstUSAU$CHROM),unique(div.outliers.USUK.lowFstUSAU$CHROM))
## [1] 12.00  1.25  4.50  6.00  0.00

Possible parallel “selection” ?

Histograms of FST, pi

What’s the statistical distribution of these values?

descdist(div$WEIGHTED_FST_AUUK)

## summary statistics
## ------
## min:  0   max:  0.401902 
## median:  0.0265902 
## mean:  0.03258447 
## estimated sd:  0.03221199 
## estimated skewness:  2.902543 
## estimated kurtosis:  19.80308
descdist(div$WEIGHTED_FST_UKUS)

## summary statistics
## ------
## min:  0   max:  0.34149 
## median:  0.0125022 
## mean:  0.01873338 
## estimated sd:  0.02438756 
## estimated skewness:  3.325395 
## estimated kurtosis:  22.98486
descdist(div$WEIGHTED_FST_USAU)

## summary statistics
## ------
## min:  0   max:  0.482831 
## median:  0.0341419 
## mean:  0.04037524 
## estimated sd:  0.03744596 
## estimated skewness:  3.350824 
## estimated kurtosis:  25.99446
lab.AU <- rep("AU.UK",length(div$WEIGHTED_FST_AUUK))
lab.US <- rep("UK.US",length(div$WEIGHTED_FST_UKUS))
Fst.group <- c(lab.AU,lab.US)
Fst.hist.data <- c(div$WEIGHTED_FST_AUUK,div$WEIGHTED_FST_USUK)
Fst.hist <- data.frame(Fst = Fst.hist.data, population = Fst.group)
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Fst.pdf",width=4,height=3)
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
  theme_classic() +
  geom_density(alpha=0.5,lwd=0.5) +
  scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) + 
  xlab("Fst") + labs(fill="Population") + 
  geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
  geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
  geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)
dev.off()
## quartz_off_screen 
##                 2
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
  theme_classic() +
  geom_density(alpha=0.5,lwd=0.5) +
  scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) + 
  xlab("Fst") + labs(fill="Population") + 
  geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
  geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
  geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)

ggplot(data=div) +
  geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  xlab("") + ylab("") +
  stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
  xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)
ggplot(data=div) +
  geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  xlab("") + ylab("") +
  stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
  xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=div) +
  geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
  xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
  xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.AUUK.pdf",height=2,width=2)
ggplot(data=div) +
  geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
  xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
  xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2
summary(div$PI_AU)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000025 0.0027682 0.0040090 0.0038645 0.0050565 0.0138998
summary(div$PI_UK)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000025 0.0028343 0.0041312 0.0039757 0.0051943 0.0141869
summary(div$PI_US)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 4.667e-06 2.753e-03 4.016e-03 3.863e-03 5.045e-03 1.409e-02
descdist(div$PI_US)

## summary statistics
## ------
## min:  4.66667e-06   max:  0.0140869 
## median:  0.00401555 
## mean:  0.003862564 
## estimated sd:  0.001783127 
## estimated skewness:  -0.1258447 
## estimated kurtosis:  3.05688
#norm.piUS.fit <- fitdist(div$PI_US,distr="norm")
#summary(norm.piUS.fit)

descdist(div$PI_UK)

## summary statistics
## ------
## min:  2.5e-06   max:  0.0141869 
## median:  0.00413121 
## mean:  0.003975727 
## estimated sd:  0.001837387 
## estimated skewness:  -0.1188124 
## estimated kurtosis:  3.055468
#norm.piUK.fit <- fitdist(div$PI_UK,distr="norm")
#summary(norm.piUK.fit)

descdist(div$PI_AU)

## summary statistics
## ------
## min:  2.5e-06   max:  0.0138998 
## median:  0.00400898 
## mean:  0.003864496 
## estimated sd:  0.001775347 
## estimated skewness:  -0.1396863 
## estimated kurtosis:  3.030743
#norm.piAU.fit <- fitdist(div$PI_AU,distr="norm")
#summary(norm.piAU.fit)

lab.AU <- rep("AU",length(div$PI_AU))
lab.US <- rep("US",length(div$PI_US))
lab.UK <- rep("UK",length(div$PI_UK))
group <- c(lab.AU,lab.US,lab.UK)
pi.hist.data <- c(div$PI_UK,div$PI_US,div$PI_AU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame':    60213 obs. of  2 variables:
##  $ pi        : num  0.001053 0.002189 0.002212 0.000915 0.000879 ...
##  $ population: Factor w/ 3 levels "AU","UK","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) + 
  xlab("Pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
  geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
  theme(legend.position="none")

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Pi.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) + 
  xlab("Pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
  geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
  theme(legend.position="none")
dev.off()
## quartz_off_screen 
##                 2

Average nucleotide diversity for both invasions is the same (0.003). There are two vertical lines overlaid in the plot above.

ggplot(data=div) +
  geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
  xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_USvsUK.pdf",width=2,height=2)
ggplot(data=div) +
  geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
  xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=div.outliers.USUK) +
  geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
  xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/PiOutliers_USvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.USUK) +
  geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
  xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=div) +
  geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
  xlab("") + ylab("") +
  xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5) 
## `geom_smooth()` using formula 'y ~ x'

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div) +
  geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
  xlab("") + ylab("") +
  xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5) 
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=div.outliers.AUUK) +
  geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
  xlab("") + ylab("") +
  xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5) 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).

dev.off()
## null device 
##           1
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.AUUK) +
  geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
  xlab("") + ylab("") +
  xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5) 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).
dev.off()
## null device 
##           1
descdist(div$TajimaD_US)

## summary statistics
## ------
## min:  -2.1488   max:  3.16509 
## median:  0.744616 
## mean:  0.7115798 
## estimated sd:  0.3466121 
## estimated skewness:  -1.309557 
## estimated kurtosis:  13.47233
descdist(div$TajimaD_UK)

## summary statistics
## ------
## min:  -2.2276   max:  3.23445 
## median:  0.737875 
## mean:  0.7152536 
## estimated sd:  0.3263563 
## estimated skewness:  -0.4741872 
## estimated kurtosis:  12.17869
descdist(div$TajimaD_AU)

## summary statistics
## ------
## min:  -2.33194   max:  3.16824 
## median:  0.798065 
## mean:  0.7829885 
## estimated sd:  0.3372602 
## estimated skewness:  -0.5499719 
## estimated kurtosis:  12.87531
descdist(div.outliers.USUK$TajimaD_US)

## summary statistics
## ------
## min:  -2.1488   max:  2.9728 
## median:  0.434343 
## mean:  0.3082123 
## estimated sd:  1.017184 
## estimated skewness:  -0.2567428 
## estimated kurtosis:  2.728269
descdist(div.outliers.AUUK$TajimaD_AU)

## summary statistics
## ------
## min:  -2.30499   max:  2.69219 
## median:  0.750086 
## mean:  0.6478708 
## estimated sd:  0.8718095 
## estimated skewness:  -0.7820734 
## estimated kurtosis:  3.98327

Step 4: What’s the impact of the genetic bottlenecks and subsequent expansion on genetic diversity in each invasion?

What’s the difference in diversity between native and invasive ranges?

When comparing ancestral diversity to a bottlenecked invasive population, we’d expect to see lower diversity in the invasive range overall. This difference would be more pronounced in regions that had differentiated from the native range (e.g., where drift and/or selection are more pronounced).

Here, difference in pi > 0 means that diversity is higher in the native range than in the invasive.

summary(div$piUK.piAU)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -1.552e-03 -1.935e-05  9.278e-05  1.112e-04  2.319e-04  2.028e-03
qqnorm(div$piUK.piAU, pch = 16)
qqline(div$piUK.piAU, pch = 16)

descdist(div$piUK.piAU)

## summary statistics
## ------
## min:  -0.00155244   max:  0.00202796 
## median:  9.278e-05 
## mean:  0.0001112317 
## estimated sd:  0.0002326892 
## estimated skewness:  0.4570582 
## estimated kurtosis:  5.950369
summary(div$piUK.piUS)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -1.381e-03 -2.535e-06  1.035e-04  1.132e-04  2.277e-04  1.762e-03
qqnorm(div$piUK.piUS, pch = 16)
qqline(div$piUK.piUS, pch = 16)

descdist(div$piUK.piUS)

## summary statistics
## ------
## min:  -0.00138117   max:  0.00176181 
## median:  0.000103491 
## mean:  0.0001131636 
## estimated sd:  0.0002065841 
## estimated skewness:  0.1527603 
## estimated kurtosis:  5.842408
div.hiUKpi.vsAU <- div[which(div$piUK.piAU > 0),]
div.hiUKpi.vsUS <- div[which(div$piUK.piUS > 0),]
div.hiUKpi.both <- div.hiUKpi.vsAU[which(div$piUK.piUS > 0),]

length(div.hiUKpi.vsAU$piUK.piAU)/length(div$piUK.piAU) # % of windows that have higher pi in native
## [1] 0.7049973
length(div.hiUKpi.vsUS$piUK.piUS)/length(div$piUK.piUS) 
## [1] 0.7432116
length(div.hiUKpi.both$piUK.piUS)/length(div$piUK.piUS) # % of windows with higher pi in native for both invasive ranges
## [1] 0.7432116

If drift acting on novel mutations explains most of the differentiation, then where FST is high, diversity should also be higher in the invasions. Could also be due to weaker purifying selection during population expansion.

descdist(div.hifst.AUUK$piUK.piAU)

## summary statistics
## ------
## min:  -0.00119697   max:  0.00202796 
## median:  3.283325e-05 
## mean:  0.0001246263 
## estimated sd:  0.0004012797 
## estimated skewness:  0.807314 
## estimated kurtosis:  4.950103
div.hifst.AUUK.hiUKpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU > 0),]
length(div.hifst.AUUK.hiUKpi$piUK.piAU)/length(div.hifst.AUUK$piUK.piAU) # % of high FST windows that have higher pi in native
## [1] 0.6224189
descdist(div.hifst.UKUS$piUK.piUS)

## summary statistics
## ------
## min:  -0.00119758   max:  0.00176181 
## median:  2.5496e-05 
## mean:  6.588734e-05 
## estimated sd:  0.0003507603 
## estimated skewness:  0.6636127 
## estimated kurtosis:  6.832871
div.hifst.UKUS.hiUKpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS > 0),]
length(div.hifst.UKUS.hiUKpi$piUK.piUS)/length(div.hifst.UKUS$piUK.piUS) 
## [1] 0.6989619
div.hiUKpi.both <- div.hifst.UKUS.hiUKpi[which(div.hifst.AUUK.hiUKpi$piUK.piUS > 0),]
length(div.hiUKpi.both$piUK.piUS)
## [1] 295
div.hifst.AUUK.hiAUpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU < 0),]
div.hifst.UKUS.hiUSpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS < 0),]

Now to plot these differences.

lab.AU <- rep("AU",length(div$piUK.piAU))
lab.US <- rep("US",length(div$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div$piUK.piUS,div$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame':    40142 obs. of  2 variables:
##  $ pi        : num  -1.04e-05 -1.83e-04 2.09e-04 8.63e-05 1.79e-04 ...
##  $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_vline(xintercept=0,colour="black",size=0.5) +
  geom_vline(xintercept=0.0005,colour="black",size=0.5) +
  geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("#2c81a8","#F2C14E")) + 
  xlab("Difference in pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
  geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
  theme(legend.position="none")

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_vline(xintercept=0,colour="black",size=0.5) +
  geom_vline(xintercept=0.0005,colour="black",size=0.5) +
  geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("#2c81a8","#F2C14E")) + 
  xlab("Difference in pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
  geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
  theme(legend.position="none")
dev.off()
## quartz_off_screen 
##                 2

Plotting high-fst (FST > 0.1) windows only

lab.AU <- rep("AU",length(div.hifst.AUUK$piUK.piAU))
lab.US <- rep("US",length(div.hifst.UKUS$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div.hifst.UKUS$piUK.piUS,div.hifst.AUUK$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame':    967 obs. of  2 variables:
##  $ pi        : num  1.88e-04 8.97e-05 1.13e-04 4.36e-04 2.19e-04 ...
##  $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_vline(xintercept=0,colour="black",size=0.5) +
  geom_vline(xintercept=0.0005,colour="black",size=0.5) +
  geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("#2c81a8","#F2C14E")) + 
  xlab("Difference in pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
  geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
  theme(legend.position="none")

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference_HiFst.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_vline(xintercept=0,colour="black",size=0.5) +
  geom_vline(xintercept=0.0005,colour="black",size=0.5) +
  geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("#2c81a8","#F2C14E")) + 
  xlab("Difference in pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
  geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
  theme(legend.position="none")
dev.off()
## quartz_off_screen 
##                 2

Are regions with novel pi also highly differentiated? Expect this scatterplot to look bimodal, where shifts in diversity in either direction led to differentiation between populations.

ggplot(data=div) +
  geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  xlab("") + ylab("") +
  stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),method="loess",col="black",lwd=0.5) +
  xlim(0,0.31) + ylim(-0.002,0.002) + theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(data=div) +
  geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  xlab("") + ylab("") +
  stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),method="loess",col="black",lwd=0.5) +
  xlim(0,0.31) + ylim(-0.002,0.002) + theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(data=div) +
  geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
  geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
  xlab("Fst (Native vs. Invasive)") + ylab("Pi Native - Pi Invasive") +
  xlim(-0.01,0.41) + ylim(-0.003,0.003) + theme_bw() +
  geom_density_2d(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU), colour="#ffffff") +
  geom_density_2d(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS), colour="#2c81a8") +
  guides(col = guide_legend(label = TRUE, label.position = "bottom", 
                           direction = "horizontal"))

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.pdf",height=5,width=5)
ggplot(data=div) +
  geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
  geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
  xlab("Fst (Native vs. Invasive)") + ylab("Pi Native - Pi Invasive") +
  xlim(-0.01,0.41) + ylim(-0.003,0.003) + theme_bw() +
  geom_density_2d(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU), colour="#ffffff") +
  geom_density_2d(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS), colour="#2c81a8") +
  guides(col = guide_legend(label = TRUE, label.position = "bottom", 
                           direction = "horizontal"))
dev.off()
## quartz_off_screen 
##                 2
#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)

#dev.off()

In a given window, if diversity in the invasive range is higher than that of the native range, it is possible that those variants are novel mutations. This filtering will tell us whether we should look at particular genotypes in these regions.

div.novelpi <- div %>% 
  filter((div$piUK.piAU < 0) & (div$piUK.piUS < 0))

# % of low native diversity SNPs are higher in diversity in both invasions
# sanity check based on calc above
length(div.novelpi$SNP)/length(div$SNP)
## [1] 0.1386578
div.novelUSpi <- div %>%
    filter(div$piUK.piUS < 0)
length(div.novelUSpi$SNP)/length(div$SNP)
## [1] 0.2566888
div.novelAUpi <- div %>%
    filter(div$piUK.piAU < 0)
length(div.novelAUpi$SNP)/length(div$SNP)
## [1] 0.2948533

How to test whether novel pi is evenly distributed across the genome? If even, then we expect to see a random sampling of chromosomes represented in this smaller dataset.

unique(div.novelpi$CHROM) # which chromosomes have higher invasive pi in both invasions
##  [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00  1.25  1.75  1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00  2.00  3.00  4.50
## [25]  4.00  5.00  6.00  7.00  8.00  9.00 29.00  0.00
unique(div.novelUSpi$CHROM) # just in US
##  [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00  1.25  1.75  1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00  2.00  3.00  4.50
## [25]  4.00  5.00  6.00  7.00  8.00  9.00 29.00  0.00
unique(div.novelAUpi$CHROM) # in AU
##  [1] 10.00 11.00 12.00 13.00 14.00 15.00 16.00 17.00 18.00 19.00  1.25  1.75
## [13]  1.00 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00  2.00  3.00
## [25]  4.50  4.00  5.00  6.00  7.00  8.00  9.00 29.00  0.00
length(unique(div.novelpi$CHROM))/length(unique(div$CHROM))
## [1] 0.969697
length(unique(div.novelUSpi$SNP)) # are most of these higher invasive pi regions also the moderately differentiated windows
## [1] 5152
length(unique(div.hifst.UKUS$SNP))
## [1] 289
length(unique(div.hifst.UKUS$SNP))/length(unique(div.novelUSpi$SNP))
## [1] 0.05609472
length(unique(div.novelAUpi$SNP))
## [1] 5918
length(unique(div.hifst.AUUK$SNP))
## [1] 678
length(unique(div.hifst.AUUK$SNP))/length(unique(div.novelAUpi$SNP))
## [1] 0.1145657

This is a different calculation than asking whether differentiated windows are also high in invasive diversity, since we’re drawing from different sets.

Can we color points in the manhattan plot by diff in diversity

SNP.novelUSpi <- div.novelUSpi$SNP
SNP.novelAUpi <- div.novelAUpi$SNP

SNP.novelAUpi.hifstonly <- div.hifst.AUUK.hiAUpi$SNP
SNP.novelUSpi.hifstonly <- div.hifst.UKUS.hiUSpi$SNP

length(div.hifst.UKUS.hiUSpi$SNP) 
## [1] 87
length(div.hifst.AUUK.hiAUpi$SNP)
## [1] 256
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.UKUS.Manhattan.pdf",w=12,h=3)

#dev.off()

manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.AUUK.Manhattan.pdf",w=12,h=3)

#dev.off()

What’s going on at outlier windows in particular?

div.outliers.hiAUpi <- div.outliers.AUUK[which(div.outliers.AUUK$piUK.piAU < 0),]

length(div.outliers.hiAUpi$SNP)
## [1] 91
length(div.outliers.hiAUpi$SNP)/length(div.outliers.AUUK$SNP)
## [1] 0.4527363

% of FST outlier windows have higher diversity in AU

div.outliers.hiUSpi <- div.outliers.USUK[which(div.outliers.USUK$piUK.piUS < 0),]

length(div.outliers.hiUSpi$SNP) 
## [1] 61
length(div.outliers.hiUSpi$SNP)/length(div.outliers.USUK$SNP)
## [1] 0.3034826

% of FST outlier windows have higher diversity in US

intersect(div.outliers.hiUSpi$CHROM,div.outliers.hiAUpi$CHROM)
## [1] 12.00  1.25  1.00  2.00  3.00  6.00  0.00
unique(div.outliers.hiAUpi$CHROM)
##  [1] 12.00 13.00  1.25  1.00 23.00 27.00  2.00  3.00  4.50  5.00  6.00  0.00
unique(div.outliers.hiUSpi$CHROM) 
## [1] 12.00 17.00  1.25  1.00  2.00  3.00  4.00  6.00  0.00

Figures: Diversity underlying candidate peaks

The plots below are based on code from Gemma Clucas.

Chromosome 2

chrom2.div <- div[which(div$CHROM==2),]
length(chrom2.div$SNP) # how many windows total (need for calculating distance to centromere)
## [1] 2997
manhattan(chrom2.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom2.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom2.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom2.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

chrom2.div.small <- chrom2.div[which(chrom2.div$SNP < 8980),]
chrom2.div.small <- chrom2.div.small[which(chrom2.div.small$SNP > 8600),]
# 39200001 to 51650000

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1)) # set rows
par(mar=c(0,2,0.5,2)) # set margins for each plot
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.5))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.04))
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, col="#39C855", lwd=1)
abline(v=76290000, col="black", lwd=0.5) # centromere position
par(mar=c(1,2,1,2))
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.2))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.2))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, type="n", xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.2))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.2)) # tajima's D axis
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome2.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 6

chrom6.div <- div[which(div$CHROM==6),]
# runs from 16155 to 16871
# chrom 6 = 716 50kb windows ("SNPs" here)

manhattan(chrom6.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom6.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom6.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom6.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

chrom6.div.small <- chrom6.div[which(chrom6.div$SNP < 16325),]
chrom6.div.small <- chrom6.div.small[which(chrom6.div.small$SNP > 16225),]

head(chrom6.div.small)
##       CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 16226     6   3550001 3600000        570        0.01206770   0.008669440
## 16227     6   3600001 3650000        528        0.03079550   0.023766500
## 16228     6   3650001 3700000        701        0.02597620   0.020965100
## 16229     6   3700001 3750000        805        0.00338468   0.000400509
## 16230     6   3750001 3800000        830        0.02082790   0.017915200
## 16231     6   3800001 3850000        800        0.00589529   0.001372410
##       WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 16226         0.0367649     0.0305788         0.0392762     0.0368076
## 16227         0.0501026     0.0406660         0.0739191     0.0613789
## 16228         0.0711590     0.0593933         0.0454643     0.0368937
## 16229         0.0754966     0.0604820         0.0708642     0.0562380
## 16230         0.0375712     0.0322166         0.0438655     0.0358050
## 16231         0.1001270     0.0672706         0.0743417     0.0579428
##            PI_UK      PI_US      PI_AU TajimaD_UK TajimaD_US TajimaD_AU   SNP
## 16226 0.00366513 0.00384209 0.00372947   0.592114   0.972481   0.785959 16226
## 16227 0.00349155 0.00329177 0.00364804   0.682877   0.782069   0.892366 16227
## 16228 0.00468438 0.00458936 0.00478475   0.859767   0.784092   0.972961 16228
## 16229 0.00535166 0.00537373 0.00535428   0.706416   0.819633   0.873451 16229
## 16230 0.00555363 0.00537467 0.00539254   0.840503   0.753410   0.770190 16230
## 16231 0.00531258 0.00519576 0.00516348   0.695610   0.790285   0.812897 16231
##         piUK.piAU   piUK.piUS
## 16226 -0.00006434 -0.00017696
## 16227 -0.00015649  0.00019978
## 16228 -0.00010037  0.00009502
## 16229 -0.00000262 -0.00002207
## 16230  0.00016109  0.00017896
## 16231  0.00014910  0.00011682
chrom6.div.hifst <- chrom6.div[which(chrom6.div$SNP < 16281),]
chrom6.div.hifst <- chrom6.div.small[which(chrom6.div.small$SNP > 16263),]

# AUUK high fst: window 5350001 to 6300001 on Chrom 6
# "SNP" 16263 to 16281

mean(chrom6.div.hifst$PI_UK) 
## [1] 0.002873224
mean(chrom6.div.hifst$PI_US) 
## [1] 0.002896161
mean(chrom6.div.hifst$PI_AU) 
## [1] 0.00293987
chrom6.div.med <- chrom6.div[which(chrom6.div$SNP < 16450),]
chrom6.div.med <- chrom6.div.med[which(chrom6.div.med$SNP > 16155),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, type="n", xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,2.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome6.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2
quartz(height=3,width=7)
options(scipen=999)
par(new=T)
## Warning in par(new = T): calling par(new=TRUE) with no plot
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.01,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome6.BroaderFSTaroundPeak.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 1

chrom1.div <- div[which(div$CHROM==1),]
length(chrom1.div$SNP)
## [1] 2279
manhattan(chrom1.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom1.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom1.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom1.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

chrom1.div.small <- chrom1.div[which(chrom1.div$SNP < 6700),]
chrom1.div.small <- chrom1.div.small[which(chrom1.div.small$SNP > 6400),]

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome1.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 1A

chrom1A.div <- div[which(div$CHROM==1.25),]
# 2896 to 4342
# 1446 windows, 3 ticks


manhattan(chrom1A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom1A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom1A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom1A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.

chrom1A.div.small <- chrom1A.div[which(chrom1A.div$SNP < 4000),]
chrom1A.div.small <- chrom1A.div.small[which(chrom1A.div.small$SNP > 3700),]

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome1A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 4

chrom4.div <- div[which(div$CHROM==4),]
# 13506 to 14923,
# 1417 windows, 7 ticks - peak ~500 windows from start 


manhattan(chrom4.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom4.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom4.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom4.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

chrom4.div.small <- chrom4.div[which(chrom4.div$SNP < 14150),]
chrom4.div.small <- chrom4.div.small[which(chrom4.div.small$SNP > 13850),]

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, col="#39C855", lwd=1)

axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome4.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 4A

chrom4A.div <- div[which(div$CHROM==4.5),]
# 13097 to 13505
# 408 windows, 4 ticks

manhattan(chrom4A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom4A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom4A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom4A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.

chrom4A.div.small <- chrom4A.div[which(chrom4A.div$SNP < 13300),]
chrom4A.div.small <- chrom4A.div.small[which(chrom4A.div.small$SNP > 13150),]

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome4A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2